# -*-coding:utf-8-*-
import re
import logging

from urllib.error import HTTPError
from urllib.request import urlopen
from urllib.parse import urlencode, quote
from bs4 import BeautifulSoup
from common import config
from common import sqlite

__author__ = 'Jason'

# 初始化日志模块
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                    datefmt='%a, %d %b %Y %H:%M:%S',
                    filename='cate.log',
                    filemode='w')

# 查询分类信息（不推荐）
def query():
    global globalCates
    globalCates = {}
    buildCateInfo()
    sqlite.insert(globalCates)
    return

# 查询分类信息,按照id进行循环
def flatQuery():
    offset = 0
    dictCate = {}
    for i in range(41836):
        try:
            url = config.cate_api1.format(i + offset)
            html = urlopen(url, timeout=5)
            bs = BeautifulSoup(html.read(), "html.parser")

            ul = bs.find("ul", {"id": "list"})

            if not ul:
                continue

            cates = ul.findAll("li")

            if not cates:
                continue

            for item in cates:
                code = item.find("span", {"class": "code"}).get_text()

                if code:
                    info = item.find("a")
                    if info:
                        title = info.get_text()
                        dictCate[code] = title

            sqlite.insert(dictCate)
            print("proccessing one dict")
        except Exception as e:
            logging.info(url)
    return

#处理单个url
def queryOne(url):
    dictCate = {}
    try:
        html = urlopen(url, timeout=5)
        bs = BeautifulSoup(html.read(), "html.parser")

        ul = bs.find("ul", {"id": "list"})

        if not ul:
            return

        cates = ul.findAll("li")

        if not cates:
            return

        for item in cates:
            code = item.find("span", {"class": "code"}).get_text()

            if code:
                info = item.find("a")
                if info:
                    title = info.get_text()
                    dictCate[code] = title

        sqlite.insert(dictCate)
        print("proccessing one dict")
    except Exception as e:
        print(e)
        logging.info(url)
    return

# 获取分类列表页信息（不推荐）
def getListHtml(url):
    html = urlopen(url,timeout=5)
    return BeautifulSoup(html.read(),"html.parser")

#构建分类信息（不推荐）
def buildCateInfo(url = config.cate_api1.format(1)):
    if len(globalCates) > 100:
        return;

    try:
        html = getListHtml(url)
    except Exception as e:
        logging.info(url)
        print(e)
        return

    ul = html.find("ul",{"id":"list"})

    if not ul:
        return

    cates = ul.findAll("li")

    if not cates:
        return

    for item in cates:
        code = item.find("span",{"class":"code"}).get_text()
        if code <= "Z":
            print("--已经处理过--")
            continue

        if code:
            info = item.find("a")
            if info:
                title = info.get_text()
                #if len(globalCates) > 10:
                globalCates[code] = title
                print(code)
                subUrl = info.attrs["href"]
                buildCateInfo(subUrl)
    return



